﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using CrawlerCore;
using WCrawlerLib;
using HtmlAgilityPack;
using System.IO;


namespace WCrawler.TestPages
{
    public partial class frmDHKHTN : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {

        }

        protected void btnLogin_Click(object sender, EventArgs e)
        {
            string year = HttpUtility.UrlEncode(txtYear.Text);
            int indexOfThesis = int.Parse(HttpUtility.UrlEncode(txtIndexThesis.Text));
            BrowserSession_General br = new BrowserSession_General();
            //br.Get("http://www.nsl.hcmus.edu.vn:8989/greenstone/cgi-bin/library.cgi?site=localhost&a=p&p=about&c=thesiskh&l=vi&w=utf-8");
            
            ////Lấy danh sách các năm bảo vệ
            //br.Get(
            //    "http://www.nsl.hcmus.edu.vn:8989/greenstone/cgi-bin/library.cgi?e=p-01000-00---off-0thesiskh--00-1----0-10-0---0---0direct-10---4-------0-1l--11-vi-50---20-about---00-3-1-00-0-0-11-1-0utfZz-8-00&a=d&cl=CL5");
            //string xpath = ".//table[@id='group_top']/tr/td[1]/a";

            //Lấy danh sách các đề tài theo năm
            //Ví dụ: link năm 2011 http://www.nsl.hcmus.edu.vn:8989/greenstone/cgi-bin/library.cgi?e=d-01000-00---off-0thesiskh--00-1----0-10-0---0---0direct-10---4-------0-1l--11-vi-50---20-about---00-3-1-00-0-0-11-1-0utfZz-8-00&a=d&c=thesiskh&cl=CL5.1.22
            HtmlDocument listThesis = br.Get(txtLinkOfYear.Text);
               
            string xpath = ".//table[@id='group_top']/tr/td[1]/a";
            HtmlNodeCollection ncThesisLink = listThesis.DocumentNode.SelectNodes(xpath);
            System.Net.WebClient wc = new System.Net.WebClient();
            int countThesis = 1;
            //Index luan van bat dau muon download
            //Tạo folder của năm download
            string path = string.Format(@"e:\Documents\MasterThesis\{0}",year);
            CreateFolder(path);
            string indexPdf = "";
            if (ncThesisLink != null)
            {
                foreach (HtmlNode node in ncThesisLink)
                {
                    if (countThesis < indexOfThesis)
                    {
                        countThesis++;
                        continue;
                    }
                    
                    string folderPath = path;
                    int countPdf = 1;
                    if (countThesis < 10)
                        folderPath += "\\0" + countThesis.ToString();
                    else
                        folderPath += "\\" + countThesis.ToString();
                    //Tạo folder của thesis
                    CreateFolder(folderPath);
                    string url = string.Format("http://www.nsl.hcmus.edu.vn:8989{0}&un=caohoc&pw=thuvien",node.Attributes["href"].Value);
                    HtmlDocument listPdf =  br.Get(url);
                    //Lấy danh sách các file pdf của mỗi luận văn
                    //xpath = ".//table[@id='table1']/tr/td[1]//a";
                    xpath = ".//table[//a[contains(@href,'.pdf') or contains(@href,'.PDF')]]/tr/td[1]//a";
                    HtmlNodeCollection ncPdfLink = listPdf.DocumentNode.SelectNodes(xpath);
                    foreach (HtmlNode nodePdf in ncPdfLink)
                    {
                        url = string.Format("http://www.nsl.hcmus.edu.vn:8989{0}", nodePdf.Attributes["href"].Value);
                        string[] filenames = nodePdf.Attributes["href"].Value.Split(new char[] {'/'});
                        
                        //Đặt lại tên file pdf theo dạng index_tenfile
                        if (countPdf < 10)
                            indexPdf = "0" + countPdf.ToString();
                        else
                            indexPdf = countPdf.ToString();

                        string filename = string.Format(@"{0}\{1}",folderPath, indexPdf + "_" + filenames[filenames.Length - 1]);

                        try
                        {
                            wc.DownloadFile(url, filename);
                        }
                        catch (Exception ex)
                        {
                            continue;
                        }
                        countPdf++;
                    }
                    countThesis++;
                }
            }
        }

        public void CreateFolder(string folderPath)
        {
            // Specify the directories you want to manipulate.
            DirectoryInfo di = new DirectoryInfo(folderPath);
            try
            {
                // Determine whether the directory exists.
                if (di.Exists)
                {
                    // Indicate that it already exists.
                    //Console.WriteLine("That path exists already.");
                    return;
                }

                // Try to create the directory.
                di.Create();
                //Console.WriteLine("The directory was created successfully.");
            }
            catch (Exception e)
            {
                Console.WriteLine("The process failed: {0}", e.ToString());
            }
            finally { }
        }

        protected void btnGetIndexAndName_Click(object sender, EventArgs e)
        {
            string year = HttpUtility.UrlEncode(txtYear.Text);
            int indexOfThesis = int.Parse(HttpUtility.UrlEncode(txtIndexThesis.Text));
            BrowserSession_General br = new BrowserSession_General();
            //br.Get("http://www.nsl.hcmus.edu.vn:8989/greenstone/cgi-bin/library.cgi?site=localhost&a=p&p=about&c=thesiskh&l=vi&w=utf-8");

            ////Lấy danh sách các năm bảo vệ
            //br.Get(
            //    "http://www.nsl.hcmus.edu.vn:8989/greenstone/cgi-bin/library.cgi?e=p-01000-00---off-0thesiskh--00-1----0-10-0---0---0direct-10---4-------0-1l--11-vi-50---20-about---00-3-1-00-0-0-11-1-0utfZz-8-00&a=d&cl=CL5");
            //string xpath = ".//table[@id='group_top']/tr/td[1]/a";

            //Lấy danh sách các đề tài theo năm
            //Ví dụ: link năm 2011 http://www.nsl.hcmus.edu.vn:8989/greenstone/cgi-bin/library.cgi?e=d-01000-00---off-0thesiskh--00-1----0-10-0---0---0direct-10---4-------0-1l--11-vi-50---20-about---00-3-1-00-0-0-11-1-0utfZz-8-00&a=d&c=thesiskh&cl=CL5.1.22
            HtmlDocument listThesis = br.Get(txtLinkOfYear.Text);

            string xpath = ".//table[@id='group_top']/tr/td[3]";
            HtmlNodeCollection ncThesisLink = listThesis.DocumentNode.SelectNodes(xpath);
            System.Net.WebClient wc = new System.Net.WebClient();
            int countThesis = 1;
            //Index luan van bat dau muon download
            //Tạo folder của năm download
            string path = string.Format(@"e:\Documents\MasterThesis\{0}", year);
            CreateFolder(path);
            path = string.Format(@"{0}\{1}", path, "list.txt");
            TextWriter tsw = new StreamWriter(path, true);
            if (ncThesisLink != null)
            {
                foreach (HtmlNode node in ncThesisLink)
                {
                    string line = "";
                    if (countThesis < 10)
                        line = string.Format("0{0}_{1}", countThesis.ToString(), node.InnerText);
                    else
                        line = string.Format("{0}_{1}", countThesis.ToString(), node.InnerText);
                    tsw.WriteLine(line);
                    countThesis++;
                }
                tsw.Close();
            }
        }
    }
}